/* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.net.protocols.ftp;
import net.nutch.net.protocols.Response;
import javax.activation.MimetypesFileTypeMap;
// 20040427, xing, disabled for now
//import xing.net.nutch.util.magicfile.*;
import org.apache.commons.net.ftp.FTPFileEntryParser;
import net.nutch.net.protocols.HttpDateFormat;
import net.nutch.net.protocols.http.MiscHttpAccounting;
import net.nutch.net.protocols.http.HttpError;
import net.nutch.util.LogFormatter;
import net.nutch.util.NutchConf;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.net.InetAddress;
import java.net.URL;
import java.io.InputStream;
import java.io.Reader;
import java.io.IOException;
/************************************
* Ftp.java deals with ftp protocol.
*
* Configurable parameters are defined under "FTP properties" section
* in ./conf/nutch-default.xml or similar.
*
* @author John Xing
***********************************/
public class Ftp {
public static final Logger LOG =
LogFormatter.getLogger("net.nutch.net.protocols.ftp.Ftp");
static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
private static final int MAX_REDIRECTS = 5;
int timeout = NutchConf.getInt("ftp.timeout", 10000);
int maxContentLength = NutchConf.getInt("ftp.content.limit",64*1024);
String userName = NutchConf.get("ftp.username", "anonymous");
String passWord = NutchConf.get("ftp.password", "anonymous@example.com");
// typical/default server timeout is 120*1000 millisec.
// better be conservative here
int serverTimeout = NutchConf.getInt("ftp.server.timeout", 60*1000);
// when to have client start anew
long renewalTime = -1;
boolean keepConnection = NutchConf.getBoolean("ftp.keep.connection", false);
boolean followTalk = NutchConf.getBoolean("ftp.follow.talk", false);
// ftp client
Client client = null;
// ftp dir list entry parser
FTPFileEntryParser parser = null;
// 20040412, xing
// the following three: HttpDateFormat, MimetypesFileTypeMap, MagicFile
// are placed in each thread before they are checked out to be thread-safe.
// http date format
HttpDateFormat httpDateFormat = null;
// file name extension to mime-type map
static MimetypesFileTypeMap TYPE_MAP = null;
// 20040427, xing, disabled for now
// // file magic for determining content type
// MagicFile magic = null;
static {
try {
// read mime types from config file
InputStream is =
NutchConf.getConfResourceAsInputStream
(NutchConf.get("mime.types.file"));
if (is == null) {
LOG.warning
("no mime.types.file: won't use url extension for content-type.");
TYPE_MAP = null;
} else {
TYPE_MAP = new MimetypesFileTypeMap(is);
}
if (is != null)
is.close();
} catch (IOException e) {
LOG.log(Level.SEVERE, "Unexpected error", e);
}
}
// constructor
public Ftp() {
this.httpDateFormat = new HttpDateFormat();
}
/** Set the timeout. */
public void setTimeout(int timeout) {
this.timeout = timeout;
}
/** Set the point at which content is truncated. */
public void setMaxContentLength(int length) {this.maxContentLength = length;}
/** Set followTalk */
public void setFollowTalk(boolean followTalk) {
this.followTalk = followTalk;
}
/** Set keepConnection */
public void setKeepConnection(boolean keepConnection) {
this.keepConnection = keepConnection;
}
/**
* Make a single FTP request and return its response in http fashion.
* If <code>addr</code> is not null, that address will be used. If
* <code>httpAccounting</code> is not <code>null</code>, the it's
* fields will be upated during this request.
*/
public Response getRawResponse (URL url)
throws IOException, FtpException {
return new FtpResponse(this, url, null, null, -1);
//return new FtpResponse(this, url, null, null, Http.HTTP_VER_NOTSET);
}
/**
* Mimic a single HTTP request and return its response in http fashion,
* not following * redirects and not translating HTTP errors to exceptions.
* If <code>addr</code> is not null, that address will be used. If
* <code>httpAccounting</code> is not <code>null</code>, it's
* fields will be upated during this request.
*/
public Response getRawResponse(URL url, InetAddress addr,
MiscHttpAccounting httpAccounting,
int httpVersion)
throws IOException, FtpException {
return new FtpResponse(this, url, addr, httpAccounting, httpVersion);
}
/** Returns the content of a URL.
* Follow redirects and translate HTTP errors to exceptions.
*/
public Response getResponse(URL url)
throws IOException, FtpException, HttpError {
int redirects = 0;
URL target = url;
while (true) {
Response response = new FtpResponse(this, target); // make a request
int code = response.getCode();
if (code == 200) { // got a good response
return response; // return it
} else if (code >= 300 && code < 400) { // handle redirect
if (redirects == MAX_REDIRECTS)
throw new FtpException("Too many redirects: " + url);
target = new URL(response.getHeader("Location"));
redirects++;
LOG.fine("redirect to " + target);
} else { // convert to exception
//throw new FtpError(code);
throw new HttpError(code);
}
}
}
protected void finalize () {
try {
if (this.client != null && this.client.isConnected()) {
this.client.logout();
this.client.disconnect();
}
} catch (IOException e) {
// do nothing
}
}
/** For debugging. */
public static void main(String[] args)
throws Exception {
//throws IOException {
int timeout = -1;
int maxContentLength = -1;
String logLevel = "info";
boolean followTalk = false;
boolean keepConnection = false;
boolean dumpContent = false;
String urlString = null;
String usage = "Usage: Ftp [-logLevel level] [-followTalk] [-keepConnection] [-timeout N] [-maxContentLength L] [-dumpContent] url";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-timeout")) {
timeout = Integer.parseInt(args[++i]) * 1000;
} else if (args[i].equals("-logLevel")) {
logLevel = args[++i];
} else if (args[i].equals("-followTalk")) {
followTalk = true;
} else if (args[i].equals("-keepConnection")) {
keepConnection = true;
} else if (args[i].equals("-maxContentLength")) {
maxContentLength = Integer.parseInt(args[++i]);
} else if (args[i].equals("-dumpContent")) {
dumpContent = true;
} else if (i != args.length-1) {
System.err.println(usage);
System.exit(-1);
} else {
urlString = args[i];
}
}
Ftp ftp = new Ftp();
ftp.setFollowTalk(followTalk);
ftp.setKeepConnection(keepConnection);
if (timeout != -1) // set timeout
ftp.setTimeout(timeout);
if (maxContentLength != -1) // set maxContentLength
ftp.setMaxContentLength(maxContentLength);
// set log level
LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
URL url = new URL(urlString);
Response response = ftp.getRawResponse(url);
int code = response.getCode();
System.err.println("Response code: " + code);
switch (code) {
case 200:
System.err.println("Content-Length: "
+ response.getHeader("Content-Length"));
System.err.println("Content-Type: "
+ response.getHeader("Content-Type"));
System.err.println("Last-Modified: "
+ response.getHeader("Last-Modified"));
if (dumpContent) {
System.out.print(new String(response.getContent()));
}
break;
case 300:
System.err.println("Redirect to: " + response.getHeader("Location"));
break;
case 401:
System.err.println("Unauthorized (anonymous login failed): " + url);
break;
case 404:
System.err.println("File not found: " + url);
break;
default:
System.err.println("Error during ftp");
}
ftp = null;
// should not need this.
//System.exit(-1);
}
}